/*
Look at differences in long-run outcomes by group

Same mapping as A6:
1, 9, 10, 12 => Lipitor, Crestor, Vytorin, generic Zocor
*/

set more off

clear
use user_quarter_choice_cholesterol



// focus on cholesterol
tab indic_id
keep if indic_id == 5


// BASIC: LOOK AT IMPACT OF FIRST CHOICE ON LATER CHOICE 
egen first_q = min(quarter), by(enrolid)
gen first_choice_temp = generic_id if quarter == first_q
egen first_choice = max(first_choice_temp), by(enrolid)

gen first_generic_temp = generic if quarter == first_q
egen first_generic = max(first_generic_temp), by(enrolid)

gen first_xr_temp = xr if quarter == first_q
egen first_xr = max(first_xr_temp), by(enrolid)


gen period = quarter - first_q
gen chose_initial = (generic_id == first_choice) & (generic == first_generic) & (xr == first_xr)


merge 1:1 enrolid quarter using "cholesterol_user_drugs_user_aggregate"
keep if _merge == 1 | _merge == 3
replace distinct_drugs = 0 if distinct_drugs == .
replace non_cholesterol = 0 if non_cholesterol == .
drop _merge


merge 1:1 enrolid quarter using "cholesterol_user_inpatient_events_quarter_aggregate"
keep if _merge == 1 | _merge == 3
replace inpatient_events = 0 if inpatient_events == .
drop _merge


// NEW PART
merge 1:1 enrolid quarter using "cholesterol_user_outpatient_user_aggregate"
keep if _merge == 1 | _merge == 3
drop _merge

replace inpatient_spending = 0 if inpatient_spending == . 
replace outpatient_spending = 0 if outpatient_spending == .
replace prescription_spending = 0 if prescription_spending == .
replace non_cholesterol_pay = 0 if non_cholesterol_pay == .


gen cholesterol_pay = prescription_spending - non_cholesterol_pay

gen tot_spending = inpatient_spending + outpatient_spending + prescription_spending
gen log_tot_spending = log(1+tot_spending)

gen log_other_spending = log(1+inpatient_spending + outpatient_spending+non_cholesterol_pay)

gen log_cholesterol_spending = log(1+cholesterol_pay)

// END NEW PART



// add on interaction variables

merge m:1 enrolid using "all_enrolid_info" // age, sex, emprel, plantyp
keep if _merge == 3
drop _merge

gen female = (sex == "2")

// create some variables fixed at start time
sort enrolid period
gen inpatient_start = inpatient_events if period == 0
replace inpatient_start = inpatient_start[_n-1] if enrolid == enrolid[_n-1] & inpatient_start[_n-1] != .

gen other_drugs_start = non_cholesterol if period == 0
replace other_drugs_start = other_drugs_start[_n-1] if enrolid == enrolid[_n-1] & other_drugs_start[_n-1] != .



// some summary stats for what people look like when first starting
summ non_cholesterol if period == 0, detail
//summ heart_related if period == 0, detail

gen first_prescription_year = year(dofq(first_q))
gen start_age = first_prescription_year - dobyr
summ start_age if period == 0, detail

tab female if period == 0
tab inpatient_events if period == 0

// winsorize and take logs (big tail)
summ other_drugs_start, detail
replace other_drugs_start = r(p99) if other_drugs_start > r(p99)
replace other_drugs_start = log(1+other_drugs_start)

replace start_age = 64 if start_age > 64 // a few odd entries


// addition
gen curr_age = start_age + (quarter - first_q) / 4


drop first_choice_temp first_choice period chose_initial



/////////////////////////////////////////////
// IDENTIFIED ANALYSIS: USING ENTRY EVENTS //
/////////////////////////////////////////////

merge m:1 generic_id using "generic_name_mapping"
keep if _merge == 3
drop _merge


rename generic_id curr_generic_id
rename gennme curr_gennme
rename generic curr_generic
rename xr curr_xr
drop drugid

// user is in the dataset only based on first use
joinby enrolid indic_id using "user_treatment_indicators_cholesterol"


// filter out irrelevant entries
// 1) not either treatment or control for that entry event
keep if treat == 1 | control == 1
replace treat = 0 if control == 1

// 2) quarter came before entry quarter
gen entry_quarter = qofd(entry_date)
gen rel_quart = quarter - entry_quarter
//keep if rel_quart >= 0

// A) generate the outcomes
gen chose_entry_drug = (curr_generic_id == generic_id) & (curr_generic == generic) & (curr_xr == xr)
tab chose_entry_drug


// B) analyze


// Does this need correcting?
gen initial = chose_entry_drug if quarter == first_q  // rel_quart == 0 (old code; this is probably cleaner)
egen initial_choice = max(initial), by(enrolid entry_num) // unnecessary? maybe only 1 entry_num per enrolid




//////////////
// BIG LOOP //
//////////////


rename cholesterol_pay chol_pay


foreach v in chol_pay inpatient_events log_tot_spending log_other_spending {


	forvalues i=1/17 {

		preserve
		
		keep if entry_num == `i' // look at branded entries
		
		// graph to illustrate difference
		collapse (mean) `v', by(treat rel_quart)


		twoway (line `v' rel_quart if treat == 1 & rel_quart > -5 & rel_quart <= 16) ///
		(line `v' rel_quart if treat == 0 & rel_quart > -5 & rel_quart <= 16), ///
		xtitle("Quarter Relative to Drug Entry") ytitle("") legend(label(1 "Diagnosed After (Treated)") label(2 "Diagnosed Before (Control)"))

		graph export "event_study_results_`v'_basic_`i'.pdf", replace
		graph save "event_study_results_`v'_basic_`i'", replace
		restore

	}

}